from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
from scrapy.http import Request
from dmoz.items import DmozItem
from scrapy import log
class DmozSpider(BaseSpider):
    name = "dmoz"
    allowed_domains = ["dlut.edu.cn"]
    start_urls = [
        "http://video.dlut.edu.cn/"
        ]

    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        sites = hxs.select('//table/tbody/tr/td/div')
        for t in sites.select('a/text()').extract():
            yield DmozItem(title=t)
        for u in sites.select('a/@href').extract():
            yield Request(u, callback=self.parse)
            
            
